Data
# Download file for state info
url = "https://opendata.arcgis.com/datasets/5f45e1ece6e14ef5866974a7b57d3b95_1.geojson"
file = "NJ_counties.geojson"
download.file(url,file)
rm(url)
# Convert file to sf object
file = "NJ_counties.geojson"
NJ_Counties <-
geojson_sf(file)
rm(file)
# Clean Data
NJ_Counties_Cleaned <-
NJ_Counties %>%
transmute(
county = COUNTY,
CO = CO,
pop = POP2010,
popdensity = POPDEN2010,
Shape_Length = Shape_Length,
Shape_Area = Shape_Area,
GNIS = GNIS
)
#get page source from website
gc()
driver <- rsDriver(browser = c("firefox"), port = 44454L)
remote_driver <- driver[["client"]]
remote_driver$navigate("https://www.childrens-specialized.org/locations-directory/?")
page <- remote_driver$getPageSource()
# Retrieve information from directory
Xpathgen1 = "/html/body/div[1]/div/div/div[2]/div/div[2]/div["
Xpathgen2 = "]/div/div[2]/article"
Hosinfo <- data.frame()
for (i in 1:15){
XPath <- paste(Xpathgen1,i,Xpathgen2,sep = "")
Node <- page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPath)
name <-
Node[[1]] %>%
html_node("h2") %>%
html_text()
address <-
Node[[1]] %>%
html_node("h3") %>%
html_text() %>%
gsub(pattern = "\n *",replacement = " ", x = .)
for (i in 1:7){
XPathday <- paste(XPath,"/div[",i,"]",sep = "")
day <- page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPathday) %>%
html_attr("class") %>%
grep("-Hours",x = .,value = TRUE) %>%
gsub("-Hours","",x = .)
times <-
page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPathday) %>%
html_node("h3") %>%
html_text()
assign(day,times)
}
row = data.frame(name,address,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday)
Hosinfo <- rbind(Hosinfo,row)
}
# Write csv file
write.csv(Hosinfo, "Hospitals.csv")
# Data Wrangling
Hosinfo <- read.csv("Hospitals.csv")
pattern <- "([0-1]*[0-9]:[0-5][0-9] *[AaPp][Mm][-to ]+[0-1]*[0-9]:[0-5][0-9] *[AaPp][Mm])" # Pattern to extract first time range
Hosloc <-
Hosinfo %>%
select(name,address) %>%
mutate_geocode(address) # Requires google API key
rm(pattern)
write.csv(Hosloc,"Hospitalsloc.csv")